const https = require("https");
const cheerio = require("cheerio");
const fs = require("fs");

let allMovies = []; // 存储数据
const total = 250; // 总数
const pageSize = 25; // 分页数
const url = "https://movie.douban.com/top250";

function crawler(page) {
  const start = (page - 1) * pageSize;
  if (start === total) {
    //  判断是否已经爬完
    fs.writeFile(
      "./top250.json",
      JSON.stringify(allMovies, null, 2),
      (err, data) => {
        if (err) throw err;
        console.log("爬取并写入文件完成");
      }
    );
  } else {
    https
      .get(
        `${url}?start=${start}`,
        {
          headers: {
            cookie: "xxx",
          },
        },
        (res) => {
          let html = "";

          res.on("data", (chunk) => (html += chunk));
          res.on("end", () => {
            const $ = cheerio.load(html);

            $("#content li .item").each(function () {
              const title = $(".info .title", this).text();
              const other = $(".info .other", this).text();
              const star = $(".info .bd .rating_num", this).text();
              const pic = $(".pic img", this).attr("src");
              allMovies.push({ title: title + other, star, pic });
            });

            crawler(++page); // 爬取下一页
          });
        }
      )
      .on("error", (err) => {
        console.error(`爬取第${page}页失败`, err);
      });
  }
}

crawler(1); // 开始爬取第一页
